Skip to content

Commit

Permalink
MP3: Assume an Info header indicates CBR for seeking purposes
Browse files Browse the repository at this point in the history
The seek table in a Xing/Info header is very imprecise (max resolution
of 255 to describe each of 100 byte positions in the file). Seeking
using a constant bitrate assumption is more accurate, especially for
longer files (which exacerbates the imprecision of the Info header).

VBR files should contain an Xing header, while an Info header is
identical but indicates the file is CBR.

Issue: #878
PiperOrigin-RevId: 597827891
  • Loading branch information
icbaker authored and copybara-github committed Jan 12, 2024
1 parent 5056dfa commit 4061d47
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 100 deletions.
4 changes: 4 additions & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@
`ColorInfo.colorSpace`, `ColorInfo.colorTransfer`, and
`ColorInfo.colorRange` values
([#692](https://github.com/androidx/media/pull/692)).
* MP3: Use constant bitrate (CBR) seeking for files with an `Info` header
(the CBR equivalent of the `Xing` header). Previously we used the seek
table from the `Info` header, but this results in less precise seeking
than if we ignore it and assume the file is CBR.
* Audio:
* Video:
* Change the `MediaCodecVideoRenderer` constructor that takes a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -524,11 +524,23 @@ private Seeker maybeReadSeekFrame(ExtractorInput input) throws IOException {
gaplessInfoHolder.encoderDelay = xingFrame.encoderDelay;
gaplessInfoHolder.encoderPadding = xingFrame.encoderPadding;
}
seeker = XingSeeker.create(input.getLength(), xingFrame, input.getPosition());
long startPosition = input.getPosition();
input.skipFully(synchronizedHeader.frameSize);
if (seeker != null && !seeker.isSeekable() && seekHeader == SEEK_HEADER_INFO) {
// Fall back to constant bitrate seeking for Info headers missing a table of contents.
return getConstantBitrateSeeker(input, /* allowSeeksIfLengthUnknown= */ false);
// An Xing frame indicates the file is VBR (so we have to use the seek header for seeking)
// while an Info header indicates the file is CBR, in which case ConstantBitrateSeeker will
// give more accurate seeking than the low-resolution seek table in the Info header. We can
// still use the length from the Info frame if we don't know the stream length directly.
if (seekHeader == SEEK_HEADER_XING) {
seeker = XingSeeker.create(input.getLength(), xingFrame, startPosition);
} else { // seekHeader == SEEK_HEADER_INFO
long streamLength =
xingFrame.dataSize != C.LENGTH_UNSET
? startPosition + xingFrame.dataSize
: C.LENGTH_UNSET;
// TODO: b/319235116 - Consider using the duration derived from the Xing/Info frame when
// it considers encoding delay and padding.
seeker =
getConstantBitrateSeeker(input, streamLength, /* allowSeeksIfLengthUnknown= */ false);
}
break;
case SEEK_HEADER_VBRI:
Expand All @@ -548,11 +560,26 @@ private Seeker maybeReadSeekFrame(ExtractorInput input) throws IOException {
/** Peeks the next frame and returns a {@link ConstantBitrateSeeker} based on its bitrate. */
private Seeker getConstantBitrateSeeker(ExtractorInput input, boolean allowSeeksIfLengthUnknown)
throws IOException {
return getConstantBitrateSeeker(input, C.LENGTH_UNSET, allowSeeksIfLengthUnknown);
}

/**
* Peeks the next frame and returns a {@link ConstantBitrateSeeker} based on its bitrate. {@code
* streamLengthFallback} is used if {@link ExtractorInput#getLength() input.getLength()} is {@link
* C#LENGTH_UNSET}. {@code streamLengthFallback} may also be {@link C#LENGTH_UNSET} to indicate
* the length is unknown.
*/
private Seeker getConstantBitrateSeeker(
ExtractorInput input, long streamLengthFallback, boolean allowSeeksIfLengthUnknown)
throws IOException {
input.peekFully(scratch.getData(), 0, 4);
scratch.setPosition(0);
synchronizedHeader.setForHeaderData(scratch.readInt());
return new ConstantBitrateSeeker(
input.getLength(), input.getPosition(), synchronizedHeader, allowSeeksIfLengthUnknown);
input.getLength() != C.LENGTH_UNSET ? input.getLength() : streamLengthFallback,
input.getPosition(),
synchronizedHeader,
allowSeeksIfLengthUnknown);
}

@EnsuresNonNull({"extractorOutput", "realTrackOutput"})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 8359
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 5434
sample count = 26
total output bytes = 5643
sample count = 27
format 0:
sampleMimeType = audio/mpeg
maxInputSize = 4096
Expand All @@ -18,107 +18,111 @@ track 0:
encoderPadding = 1404
metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]]
sample 0:
time = 344808
time = 339500
flags = 1
data = length 209, hash 34191E1
data = length 209, hash 6CCBBB3B
sample 1:
time = 370930
time = 365622
flags = 1
data = length 209, hash 57323ED7
data = length 209, hash 34191E1
sample 2:
time = 397052
time = 391744
flags = 1
data = length 209, hash 75618CF3
data = length 209, hash 57323ED7
sample 3:
time = 423175
time = 417867
flags = 1
data = length 209, hash 784C973B
data = length 209, hash 75618CF3
sample 4:
time = 449297
time = 443989
flags = 1
data = length 209, hash 49106390
data = length 209, hash 784C973B
sample 5:
time = 475420
time = 470112
flags = 1
data = length 209, hash 70F6A563
data = length 209, hash 49106390
sample 6:
time = 501542
time = 496234
flags = 1
data = length 209, hash 721882B0
data = length 209, hash 70F6A563
sample 7:
time = 527665
time = 522357
flags = 1
data = length 209, hash 81C62AEE
data = length 209, hash 721882B0
sample 8:
time = 553787
time = 548479
flags = 1
data = length 209, hash 16D22463
data = length 209, hash 81C62AEE
sample 9:
time = 579910
time = 574602
flags = 1
data = length 209, hash 47033534
data = length 209, hash 16D22463
sample 10:
time = 606032
time = 600724
flags = 1
data = length 209, hash CECB37A6
data = length 209, hash 47033534
sample 11:
time = 632154
time = 626846
flags = 1
data = length 209, hash 6C9C307B
data = length 209, hash CECB37A6
sample 12:
time = 658277
time = 652969
flags = 1
data = length 209, hash 3EB1A364
data = length 209, hash 6C9C307B
sample 13:
time = 684399
time = 679091
flags = 1
data = length 209, hash 30962500
data = length 209, hash 3EB1A364
sample 14:
time = 710522
time = 705214
flags = 1
data = length 209, hash 2C5CCBB7
data = length 209, hash 30962500
sample 15:
time = 736644
time = 731336
flags = 1
data = length 209, hash F9CB9E37
data = length 209, hash 2C5CCBB7
sample 16:
time = 762767
time = 757459
flags = 1
data = length 209, hash F75BC8C0
data = length 209, hash F9CB9E37
sample 17:
time = 788889
time = 783581
flags = 1
data = length 209, hash D00ED607
data = length 209, hash F75BC8C0
sample 18:
time = 815012
time = 809704
flags = 1
data = length 209, hash B4338395
data = length 209, hash D00ED607
sample 19:
time = 841134
time = 835826
flags = 1
data = length 209, hash E3E838A0
data = length 209, hash B4338395
sample 20:
time = 867256
time = 861948
flags = 1
data = length 209, hash 2B0CF78
data = length 209, hash E3E838A0
sample 21:
time = 893379
time = 888071
flags = 1
data = length 209, hash 31906FA9
data = length 209, hash 2B0CF78
sample 22:
time = 919501
time = 914193
flags = 1
data = length 209, hash C92FC08F
data = length 209, hash 31906FA9
sample 23:
time = 945624
time = 940316
flags = 1
data = length 209, hash 7C89994
data = length 209, hash C92FC08F
sample 24:
time = 971746
time = 966438
flags = 1
data = length 209, hash EC37743B
data = length 209, hash 7C89994
sample 25:
time = 997869
time = 992561
flags = 1
data = length 209, hash EC37743B
sample 26:
time = 1018683
flags = 1
data = length 209, hash C974F6FB
tracksEnded = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 2717
sample count = 13
total output bytes = 2926
sample count = 14
format 0:
sampleMimeType = audio/mpeg
maxInputSize = 4096
Expand All @@ -18,55 +18,59 @@ track 0:
encoderPadding = 1404
metadata = entries=[TSSE: description=null: values=[Lavf58.45.100]]
sample 0:
time = 679168
time = 679125
flags = 1
data = length 209, hash 30962500
data = length 209, hash 3EB1A364
sample 1:
time = 705290
time = 705247
flags = 1
data = length 209, hash 2C5CCBB7
data = length 209, hash 30962500
sample 2:
time = 731412
time = 731369
flags = 1
data = length 209, hash F9CB9E37
data = length 209, hash 2C5CCBB7
sample 3:
time = 757535
time = 757492
flags = 1
data = length 209, hash F75BC8C0
data = length 209, hash F9CB9E37
sample 4:
time = 783657
time = 783614
flags = 1
data = length 209, hash D00ED607
data = length 209, hash F75BC8C0
sample 5:
time = 809780
time = 809737
flags = 1
data = length 209, hash B4338395
data = length 209, hash D00ED607
sample 6:
time = 835902
time = 835859
flags = 1
data = length 209, hash E3E838A0
data = length 209, hash B4338395
sample 7:
time = 862025
time = 861982
flags = 1
data = length 209, hash 2B0CF78
data = length 209, hash E3E838A0
sample 8:
time = 888147
time = 888104
flags = 1
data = length 209, hash 31906FA9
data = length 209, hash 2B0CF78
sample 9:
time = 914270
time = 914227
flags = 1
data = length 209, hash C92FC08F
data = length 209, hash 31906FA9
sample 10:
time = 940392
time = 940349
flags = 1
data = length 209, hash 7C89994
data = length 209, hash C92FC08F
sample 11:
time = 966514
time = 966471
flags = 1
data = length 209, hash EC37743B
data = length 209, hash 7C89994
sample 12:
time = 992637
time = 992594
flags = 1
data = length 209, hash EC37743B
sample 13:
time = 1018716
flags = 1
data = length 209, hash C974F6FB
tracksEnded = true
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ seekMap:
isSeekable = true
duration = 1044875
getPosition(0) = [[timeUs=0, position=227]]
getPosition(1) = [[timeUs=1, position=227]]
getPosition(522437) = [[timeUs=522437, position=4582]]
getPosition(1044875) = [[timeUs=1044875, position=8585]]
getPosition(1) = [[timeUs=0, position=227], [timeUs=26000, position=435]]
getPosition(522437) = [[timeUs=520000, position=4387], [timeUs=546000, position=4595]]
getPosition(1044875) = [[timeUs=1018875, position=8378]]
numberOfTracks = 1
track 0:
total output bytes = 8359
Expand Down

0 comments on commit 4061d47

Please sign in to comment.